Code
library(zoib)
library(tidyverse)
library(GGally)
library(ggpubr)
library(kableExtra)
theme_set(theme_pubr(legend = "bottom"))
# read in raw data
nnns0 <- read.csv("../NNNS_score_data.csv")library(zoib)
library(tidyverse)
library(GGally)
library(ggpubr)
library(kableExtra)
theme_set(theme_pubr(legend = "bottom"))
# read in raw data
nnns0 <- read.csv("../NNNS_score_data.csv")# clean up variable names
# remove the dots at the end of variable names
colnames(nnns0) <- gsub("\\.+$", "", colnames(nnns0))
# replace all the dots in variable names with underscores
colnames(nnns0) <- gsub("\\.+", "_", colnames(nnns0))
nnns <- nnns0 |>
filter(
# exclude neurologic or airway anomaly
Neurologic_Complication == 0, AirwayAnomalyYN == 0,
# include infants from birth to 4 weeks old
# there are two outliers with age at surgery > 30 days
Age_at_Surgery_days <= 30
) |>
# some binary variables have values of 1/2 or Y/N, recode them to 0/1
mutate(
Female = as.integer(sex_1_M_2_F == 2),
Premature = as.integer(Premature == 1),
Extubation_failure = as.integer(Extubation_failure == "Y"),
) |>
# relabel cardiac anatomy
mutate(
Cardiac_Anatomy = factor(
Cardiac_Anatomy, levels = 1:4,
labels = c(
"Single ventricle w/o arch obstruction",
"Single ventricle w/ arch obstruction",
"Two ventricle w/o arch obstruction",
"Two ventricle w/ arch obstruction"
)
),
# for model building purposes, combine the 2 levels w/o arch obstruction
Cardiac_Anatomy_collapsed = fct_collapse(
Cardiac_Anatomy,
"W/o arch obstruction" = c("Single ventricle w/o arch obstruction", "Two ventricle w/o arch obstruction"),
"Single ventricle w/ arch obstruction" = "Single ventricle w/ arch obstruction",
"Two ventricle w/ arch obstruction" = "Two ventricle w/ arch obstruction"
)
) |>
# convert date variables to date class
mutate_at(
vars("Date_PO_feeds_started", "Date_Reaching_Full_PO", "Date_Identified_as_not_yet_full_PO"),
as_date, format = "%m/%d/%Y"
)
# drop unnecessary variables
nnns <- nnns |> select(!c(
"sex_1_M_2_F", # use Female instead
"Intubated_Pre_operatively", "bypass_used", "bypass_time_min", # not of interest
"Neurologic_Complication", "AirwayAnomalyYN" # already excluded
)) \[ f(y_i|\eta_i) = \begin{cases} p_i, & y_i =0 \\ (1-p_i)q_i, & y_i =1 \\ (1 − p_i)(1 − q_i)Beta(\alpha_{i_1}, \alpha_{i_2}) &y_i \in (0, 1) \end{cases} \]
\[ \begin{aligned} logit\left(\mu^{(0,1)} = \frac{\alpha_1}{\alpha_1+ \alpha_2}\right) = \mathbf x_1 \boldsymbol \beta_1 + I_1(\mathbf z_{1,i}\gamma_{1,i}) \\ log\left(v_i = \alpha_1+ \alpha_2\right) = \mathbf x_2 \boldsymbol \beta_2 + I_2(\mathbf z_{2,i}\gamma_{2,i}) \\ logit\left(p_i\right) = \mathbf x_3 \boldsymbol \beta_3 + I_1(\mathbf z_{3,i}\gamma_{3,i}) \\ logit\left(q_i\right) = \mathbf x_4 \boldsymbol \beta_4 + I_1(\mathbf z_{4,i}\gamma_{4 ,i}) \end{aligned} \]
set.seed(11282023) #Set seed, bayesian model!
tictoc::tic()
model1 =zoib(Percent_of_feeds_taken_by_mouth_at_discharge #Response
~Pre_Op_NNNS_attention_score+
Post_Op_NNNS_attention_score+
Female+Genetic_Syndrome_or_Chromosomal_Abnormality+
Age_at_Surgery_days+Cardiac_Anatomy+
Length_of_intubation_days+Length_of_Stay_days+
Extubation_failure+GI_Complication+Premature
#x1 design matrix
| 1 #x2 design matrix- estimating variance
|Pre_Op_NNNS_attention_score+
Post_Op_NNNS_attention_score+
Female+Genetic_Syndrome_or_Chromosomal_Abnormality+
Age_at_Surgery_days+Cardiac_Anatomy+
Length_of_intubation_days+Length_of_Stay_days+
Extubation_failure+GI_Complication+Premature #x3 design matrix
|Pre_Op_NNNS_attention_score+
Post_Op_NNNS_attention_score+
Female+Genetic_Syndrome_or_Chromosomal_Abnormality+
Age_at_Surgery_days+Cardiac_Anatomy+
Length_of_intubation_days+Length_of_Stay_days+
Extubation_failure+GI_Complication+Premature, #x4 design matrix
data = nnns,
n.response = 1,
zero.inflation = TRUE,
one.inflation = TRUE,
link.mu = "logit",
link.x0 = "logit",
link.x1 = "logit",
random = 0,
n.chain = 4, # number of MCMC chains
n.iter = 10000, #number of iterations before burn/thin
n.thin = 10, # thinning period
n.burn = 5000, # burn-in period
seeds = c(11,29,20,23) #vector of seeds for chains to make reproducable
)
tictoc::toc()
saveRDS(model1, file="model1.RData")b: vector of estimates from Eqn 1; that is, g(mu) = xb*b +z*gamma
d: vector of estimates from Eqn 2; that is, log(eta) = xd*d+z*gamma
b0: vector of estimates from Eqn 3; that is, g(p0) = x0*b0 +z*gamma
b1: vector of estimates from Eqn ;4 that is, g(p1) = x1*b1+z*gamma
model1 =readRDS(file="model1.RData")
model1$coeff |> summary()
Iterations = 1:500
Thinning interval = 1
Number of chains = 4
Sample size per chain = 500
1. Empirical mean and standard deviation for each variable,
plus standard error of the mean:
Mean SD Naive SE Time-series SE
b[1] -14.54643 3.79282 0.084810 0.112361
b[2] -0.03687 0.26720 0.005975 0.007431
b[3] 2.11937 0.54855 0.012266 0.016298
b[4] -0.11380 0.74085 0.016566 0.023209
b[5] -0.20126 1.02204 0.022854 0.032359
b[6] 0.31640 0.17460 0.003904 0.006149
b[7] 2.46439 0.99358 0.022217 0.032028
b[8] -1.56983 0.66351 0.014837 0.016602
b[9] -1.45467 0.70862 0.015845 0.017257
b[10] 0.36189 0.22985 0.005140 0.007906
b[11] -0.09706 0.06148 0.001375 0.002177
b[12] 1.61259 1.71191 0.038280 0.049590
b[13] -0.60487 1.68460 0.037669 0.051732
b[14] 3.48052 1.49874 0.033513 0.042725
b0[1] -31.92729 22.81937 0.510257 3.233061
b0[2] -2.91548 1.48350 0.033172 0.043964
b0[3] -0.42010 1.47138 0.032901 0.039653
b0[4] -0.44539 2.06109 0.046087 0.046822
b0[5] -1.94152 2.86765 0.064123 0.084092
b0[6] -1.55378 0.75479 0.016878 0.025536
b0[7] 40.71062 20.73035 0.463545 3.701555
b0[8] 29.64725 20.86880 0.466640 3.261778
b0[9] 40.25138 20.61317 0.460925 3.587458
b0[10] 1.91432 1.04041 0.023264 0.039055
b0[11] 0.25466 0.18990 0.004246 0.005937
b0[12] 0.29247 6.75925 0.151141 0.185871
b0[13] -11.98803 6.76792 0.151335 0.233134
b0[14] -0.31578 3.33042 0.074470 0.075875
b1[1] -246.94180 140.67302 3.145544 12.348793
b1[2] 9.71477 16.24570 0.363265 1.473622
b1[3] 19.30748 26.58829 0.594532 2.515335
b1[4] 27.16566 32.56935 0.728273 2.517479
b1[5] -52.73324 41.81370 0.934983 3.222738
b1[6] 7.79613 4.82919 0.107984 0.466384
b1[7] 20.31243 49.05669 1.096941 5.049487
b1[8] -37.15633 45.06154 1.007607 3.822780
b1[9] 50.27294 33.73077 0.754243 2.878735
b1[10] 1.97353 9.57740 0.214157 0.861758
b1[11] -0.30451 1.63862 0.036641 0.134717
b1[12] 18.69918 76.09921 1.701630 5.621423
b1[13] -73.30273 68.26586 1.526471 6.202346
b1[14] -14.25481 66.51609 1.487345 6.268917
d 2.33198 0.47767 0.010681 0.010777
2. Quantiles for each variable:
2.5% 25% 50% 75% 97.5%
b[1] -21.98595 -16.8586 -14.62101 -12.23765 -6.81435
b[2] -0.55928 -0.2062 -0.03942 0.13607 0.46849
b[3] 0.96452 1.7884 2.11214 2.45876 3.22496
b[4] -1.51925 -0.5849 -0.12952 0.33908 1.41166
b[5] -2.31980 -0.8449 -0.16481 0.44366 1.80340
b[6] -0.04951 0.2049 0.31708 0.42510 0.66155
b[7] 0.65760 1.8160 2.44321 3.08921 4.52007
b[8] -2.77039 -2.0152 -1.60031 -1.15968 -0.17322
b[9] -2.84112 -1.9140 -1.45926 -1.00895 -0.01867
b[10] -0.09425 0.2140 0.36381 0.50881 0.81918
b[11] -0.21940 -0.1362 -0.09563 -0.05761 0.02343
b[12] -2.01627 0.6105 1.63017 2.67430 4.82337
b[13] -4.16874 -1.6223 -0.53803 0.49908 2.62893
b[14] 0.75630 2.4433 3.42364 4.48379 6.51000
b0[1] -85.17524 -45.0485 -29.19349 -15.52361 4.39287
b0[2] -6.03695 -3.8312 -2.81648 -1.89994 -0.22524
b0[3] -3.45882 -1.3550 -0.38895 0.50419 2.43841
b0[4] -4.51558 -1.8420 -0.44420 0.93104 3.54290
b0[5] -7.82432 -3.7598 -1.80934 0.12009 3.24410
b0[6] -3.24375 -2.0276 -1.48691 -1.02016 -0.26755
b0[7] 11.46390 24.9428 37.00649 52.07197 89.70499
b0[8] -0.59272 14.1864 26.01167 41.52379 79.43970
b0[9] 11.22946 24.6897 36.52107 51.38559 88.94062
b0[10] 0.22011 1.1771 1.78536 2.53813 4.27730
b0[11] -0.09228 0.1268 0.24428 0.37032 0.65696
b0[12] -13.61180 -3.7584 0.16828 4.68888 13.50700
b0[13] -26.99021 -15.8957 -11.18311 -7.22033 -1.09040
b0[14] -7.04035 -2.4400 -0.20221 1.92480 6.06073
b1[1] -555.48649 -338.5155 -237.70385 -145.17682 -2.87013
b1[2] -21.51656 -0.8320 9.39656 20.46622 42.86643
b1[3] -34.15392 1.3824 19.55716 37.91474 67.89801
b1[4] -29.65046 4.9261 23.87769 47.31136 95.48295
b1[5] -145.85491 -78.1293 -47.40276 -23.85150 16.10922
b1[6] -0.32582 4.2574 7.43429 10.84467 17.94451
b1[7] -79.16618 -13.0682 19.90946 52.82692 119.14744
b1[8] -127.64679 -68.0751 -35.85644 -5.08605 45.37185
b1[9] -10.98184 27.3263 47.85364 71.25838 125.73851
b1[10] -15.56388 -4.6997 1.64875 8.23691 21.43392
b1[11] -3.58036 -1.3097 -0.30256 0.78924 2.90186
b1[12] -153.65331 -27.8863 22.67187 73.98942 150.68028
b1[13] -213.09062 -117.9273 -72.54984 -27.02958 57.73211
b1[14] -142.41862 -61.1852 -12.44624 29.51041 126.59074
d 1.31880 2.0291 2.35622 2.65517 3.19815
traceplot(model1$coeff)autocorr.plot(model1$coeff)gelman.diag(model1$coeff)Potential scale reduction factors:
Point est. Upper C.I.
b[1] 1.013 1.04
b[2] 1.005 1.02
b[3] 1.007 1.03
b[4] 1.014 1.05
b[5] 1.026 1.08
b[6] 1.013 1.04
b[7] 1.008 1.03
b[8] 1.003 1.01
b[9] 1.000 1.00
b[10] 1.029 1.08
b[11] 1.032 1.09
b[12] 1.027 1.07
b[13] 1.009 1.03
b[14] 1.009 1.03
b0[1] 1.344 1.91
b0[2] 1.004 1.01
b0[3] 1.000 1.00
b0[4] 1.002 1.01
b0[5] 1.002 1.01
b0[6] 1.004 1.01
b0[7] 1.454 2.27
b0[8] 1.454 2.24
b0[9] 1.451 2.25
b0[10] 0.999 1.00
b0[11] 1.004 1.01
b0[12] 1.005 1.02
b0[13] 1.001 1.01
b0[14] 1.001 1.00
b1[1] 1.007 1.01
b1[2] 1.046 1.11
b1[3] 1.026 1.07
b1[4] 1.024 1.07
b1[5] 1.018 1.04
b1[6] 1.028 1.08
b1[7] 1.023 1.04
b1[8] 1.056 1.15
b1[9] 1.021 1.06
b1[10] 1.009 1.03
b1[11] 1.048 1.13
b1[12] 1.093 1.25
b1[13] 1.024 1.07
b1[14] 1.066 1.19
d 1.006 1.02
Multivariate psrf
1.36